/***************************************************************************************
Firm-embedded productivity and cross-country income differences
Alviarez, Cravino and Ramondo
Journal of Political Economy (2022)

Program: clean_02_orbis.do
Date: October 2022

Description: Clean Orbis data for aggregate level analysis 

*****************************************************************************************/


/*
*-----------------------------------------------------------------
*(In SAS) Aggregate employment and sales (now we are also including wage bill)
*-----------------------------------------------------------------
ods html close; 
proc import out= orbis_historical datafile = "${intermediate}/final_all_firms_20200123.dta";
run;

data orbis_historical_2;
 set orbis_historical;
run;

data orbis_historical_2; 
set orbis_historical_2;
isocode=substr(bvd_id,1,2);
run;

data orbis_historical_2; 
set orbis_historical_2;
if dupli=0 ;
run;

data orbis_historical_3;
   set orbis_historical_2;
   array change _numeric_;
        do over change;
            if change<0 then change=.;
        end;
 run ;

PROC MEANS DATA=orbis_historical_3 NWAY NOPRINT ;
  CLASS year isocode industry_code;
  VAR turn emp exp_rev val_added;
  OUTPUT OUT=orbis_sum SUM(turn emp exp_rev val_added)= turn emp exp_rev val_added;
RUN;

data "${intermediate}/orbis_historical_agg.sas7bdat";
set orbis_sum;
run;

proc export data=orbis_sum outfile= "${intermediate}/orbis_historical_agg.dta";
run;


*------------------------------------------
*MNCs employment and sales 
*------------------------------------------
ods html close; 
proc import out= orbis_historical datafile = "${intermediate}/final_all_firms_20200123.dta";
run;

data orbis_historical_2;
 set orbis_historical;
run;

data orbis_historical_2; 
set orbis_historical_2;
isocode=substr(bvd_id,1,2);
hq=substr(guo_50,1,2);
run;

data orbis_historical_2; 
set orbis_historical_2;
if dupli=0 ;
if isocode~=hq ;
run;

data orbis_historical_3;
   set orbis_historical_2;
   array change _numeric_;
        do over change;
            if change<0 then change=.;
        end;
run;

proc export data=orbis_historical_3 outfile= "${intermediate}/orbis_historical_mnc.dta";
run;

*/



*-----------------------------------------------------------------------
*2021 ORBIS aggregate level data (we have the code in SAS in order to produce this collapses files. I will add that information in the Read Me file)
*-----------------------------------------------------------------------
clear all
set more off
global typeden=0 
do "TFP_firm_directory_historical.do"

*List of aggregates so far
*-----------------------------------------------
*all all_nlf
*saemvawb saemvawb_nlf
*vawb vawb_nlf
*saemva saemva_nlf
*emva emva_nlf
*saem saem_nlf
*sawb sawb_nlf
*savawb savawb_nlf
*old list all saem_nlf saem vawb_nlf vawb


foreach selectedvar in  all emva saem saemva saemvawb savawb sawb vawb  {
display "`selectedvar'"


***local selectedvar all
clear all
import delimited "${tfp_bef}/ORBIS_2018/orbis_batch2021_collapsed/agg_orbis_`selectedvar'.txt", bindquote(strict) stripquote(yes) clear 
drop  materialcosts
tab consolidationcode
drop if consolidationcode=="C2" | consolidationcode=="C1" 
drop if consolidationcode=="LF"

*Pre-clean industrial code
*-----------------------------------------
rename naicscorecode4digits NAICS4
tostring NAICS4, replace
replace NAICS4="" if NAICS4=="."
gen ll=length(NAICS4)
replace NAICS4=NAICS4+"0" if ll==3
replace NAICS4=NAICS4+"00" if ll==2
replace NAICS4=NAICS4+"000" if ll==1
drop ll

*Pre-clean financial variables
*-----------------------------------------
rename turn GO_usd
rename numberofemployees EMP
rename addedvalue VA_usd
rename exportrevenue EXP_usd
rename costsofemployees LAB_usd

replace EMP=EMP/1000
foreach vv in GO_usd EXP_usd VA_usd LAB_usd {
replace `vv'=`vv'/1000000
}
*

*Isocodes
*----------------------------------------   
global cond1 `"  isocode=="AR" | isocode=="AT" | isocode=="AU" | isocode=="BE" | isocode=="BG" | isocode=="BR" | isocode=="CA" |  isocode=="CH" | isocode=="CL" | isocode=="CN" | isocode=="CY" | isocode=="CO" | isocode=="CZ" |  isocode=="DE" |  isocode=="DK" |  isocode=="EE" |  isocode=="ES" |  isocode=="FI" |  isocode=="FR" |  isocode=="GB" |  isocode=="GR" |  isocode=="HK" |  isocode=="HU" |  isocode=="HR" |  isocode=="ID" |  isocode=="IL" |  isocode=="IE" | isocode=="IN" |  isocode=="IS" |  isocode=="IT" |  isocode=="JP" |  isocode=="KR" | isocode=="LT" | isocode=="LV" | isocode=="LU" |  isocode=="MX" |  isocode=="MT" |  isocode=="MY" |  isocode=="NL" |   isocode=="NO" |  isocode=="NZ" |  isocode=="PE" |  isocode=="PL" |  isocode=="PT" |  isocode=="RO" |  isocode=="RS" |   isocode=="RU" | isocode=="SE" |  isocode=="SG" | isocode=="SI" | isocode=="SK" | isocode=="TH" | isocode=="TR" | isocode=="TW" | isocode=="UA" |  isocode=="US" | isocode=="VE" |  isocode=="ZA"   "'
gen a=($cond1)
tab a
keep if a==1
drop a


*Sector classification* 
*------------------------------------------------------------------
gen NAICS2=substr(NAICS4,1,2)
gen NAICS3=substr(NAICS4,1,3)


gen sector="NA"
replace sector="NA" if NAICS2=="" | NAICS2=="NA"
replace sector="Agriculture (A)" if NAICS2=="11"
replace sector="Mining (B)" if NAICS2=="21"

replace sector="Food (10-12)" if NAICS3=="311" | NAICS3=="312"
replace sector="Textiles (13-15)" if NAICS3=="313" | NAICS3=="314" | NAICS3=="315" | NAICS3=="316"
replace sector="Wood (16-18)" if NAICS3=="321" | NAICS3=="322" | NAICS3=="323"
replace sector="Petroleum (19)" if NAICS3=="324"
replace sector="Chemicals (20-21)" if NAICS3=="325"
replace sector="Plastic (22-23)" if NAICS3=="326" | NAICS3=="327"
replace sector="Basic Metals (24-25)" if NAICS3=="331" | NAICS3=="332"
replace sector="Electrical_Equip (26-27)" if NAICS3=="334" |  NAICS3=="335" 
replace sector="Machinery (28)" if NAICS3=="333" 
replace sector="Transport (29-30)" if NAICS3=="336"
replace sector="Other_Manufacturing (31-33)" if NAICS3=="337" | NAICS3=="339"

replace sector="Electricity (D-E)" if NAICS2=="22"
replace sector="Construction (F)" if NAICS2=="23"
replace sector="Wholesale_Retail (G)" if NAICS2=="42" | NAICS2=="44" | NAICS2=="45"
***replace sector="Wholesale_Retail (G)" if NAICS2=="41" | NAICS2=="44" | NAICS2=="45" *** no entiendo porque saque el 42 de aca??? Tampoco hay NAICS 41

replace sector="Transportation_Storage (H)" if NAICS2=="48" | NAICS2=="49"
replace sector="Accomodation (I)" if NAICS2=="72"
replace sector="Information (J)" if  NAICS2=="51"
replace sector="Financial_Insurance (K)" if  NAICS2=="52"
replace sector="Real_Estate (L)" if  NAICS2=="53"
replace sector="Support_Services (M-N)" if NAICS2=="54" | NAICS2=="55" | NAICS2=="56"
replace sector="Public_Administration (O)" if NAICS2=="92"
replace sector="Education (P)" if  NAICS2=="61"
replace sector="Health (Q)" if  NAICS2=="62"
replace sector="Recreation (R-S)" if NAICS2=="71" | NAICS2=="81"


*Do further aggregations (to avoid iso-sector codes without information. But notice that by eliminating the variable when at least one sector is missing we are not helping that RIGHT?)
gen sector2=sector
replace sector2="TexWood (13-18)" if sector=="Textiles (13-15)" | sector=="Wood (16-18)"
replace sector2="ChePetPla (19-23)" if sector=="Chemicals (20-21)" | sector=="Petroleum (19)" | sector=="Plastic (22-23)"
replace sector2="ElecMach (26-28)" if sector=="Electrical_Equip (26-27)" | sector=="Machinery (28)"
replace sector2="TranspOtherManuf (29-33)" if sector=="Transport (29-30)" | sector=="Other_Manufacturing (31-33)"
replace sector2="AgrMining (A-B)" if sector=="Agriculture (A)" | sector=="Mining (B)"
replace sector2="Accomod_Recreat (I-R-S)" if sector=="Accomodation (I)" | sector=="Recreation (R-S)"
gen sec_count=1
replace sec_count=2 if sector2=="TexWood (13-18)"
replace sec_count=3 if sector2=="ChePetPla (19-23)"
replace sec_count=2 if sector2=="ElecMach (26-28)"
replace sec_count=2 if sector2=="TranspOtherManuf (29-33)"
replace sec_count=2 if sector2=="AgrMining (A-B)"
replace sec_count=2 if sector2=="Accomod_Recreat (I-R-S)"

*Replace by missing if any of them is not available
/*
foreach var of varlist GO_usd EMP EXP_usd VA_usd LAB_usd {
display "`var'"
by year isocode sector2, sort: egen xx=count(`var')
replace `var'=. if xx<sec_count
drop xx
}
*
*/

include "${code}/label_collapse_bef.do"
collapse (sum) GO_usd EMP EXP_usd VA_usd LAB_usd number_turm number_emp, by(isocode year sector2)
include "${code}/label_collapse_aft.do"
rename sector2 sector

tab sector 
tab isocode if sector=="NA" | sector==""


*Collapse the financial variables using the sector variable 
*---------------------------------------------------------
foreach vv in GO_usd VA_usd EMP EXP_usd LAB_usd {
display "`vv'"
replace `vv'=. if `vv'<=0
}
label variable GO_usd "Gross output at current prices (MM USD)"
label variable VA_usd "Value added, current prices (MM USD)"
label variable EMP "Number of employees (Thousands)"
label variable EXP_usd "Exports, current prices (MM USD)"
label variable LAB_usd "Labor Cost, current prices (MM USD)"

drop if isocode=="CY" | isocode=="ID" | isocode=="IL"| isocode=="MT" | isocode=="MY" | isocode=="VE" | isocode=="ZA"   
drop if GO_usd==. & VA_usd==. &  EMP==. &  EXP_usd==. & LAB_usd==.
format %9.0fc GO_usd VA_usd EXP_usd LAB_usd number_turm number_emp
format %9.2fc EMP
sort year isocode sector
compress 

*Keep only the period under analysis 
keep if year>=2005 & year<=2017


foreach var of varlist GO_usd VA_usd EMP EXP_usd LAB_usd number_turm number_emp {
rename `var' `var'_orbis_`selectedvar'
}
save "${output}/orbis_historical_agg_allyears_`selectedvar'.dta", replace

}


*---------------------------------------------------------------------
*Merge this files in one 
*---------------------------------------------------------------------
clear all
use "${output}/orbis_historical_agg_allyears_vawb.dta"
foreach selectedvar in  all emva saem saemva saemvawb savawb sawb vawb {
display "`selectedvar'"
merge 1:1 year isocode sector using "${output}/orbis_historical_agg_allyears_`selectedvar'.dta"
drop _merge
}
order isocode year sector GO_usd_orbis* EMP_orbis* VA_usd_orbis* LAB_usd_orbis* EXP_usd_orbis* number_turm* number_emp*

compress
tempfile temp
save `temp', replace 
save "${output}/orbis_historical_agg_allyears_allvariables.dta", replace

*Save it in case we have any problem. Also we can compare afterwards. 
***save "${output}/orbis_historical_agg_allyears_allvariables_06142022.dta", replace



*-----------------------------------------------------------------------------
**** Considering (aggregates) with only the first 20 firms in each economy ****
*-----------------------------------------------------------------------------
clear all
import delimited "${tfp_bef}/ORBIS_2018/orbis_batch2021_collapsed/agg_orbis_all_forcollapse.txt", bindquote(strict) stripquote(yes) clear 
drop  materialcosts
tab consolidationcode
drop if consolidationcode=="C2" | consolidationcode=="C1" 
drop if consolidationcode=="LF"

*Pre-clean industrial code
*-----------------------------------------
rename naicscorecode4digits NAICS4
tostring NAICS4, replace
replace NAICS4="" if NAICS4=="."
gen ll=length(NAICS4)
replace NAICS4=NAICS4+"0" if ll==3
replace NAICS4=NAICS4+"00" if ll==2
replace NAICS4=NAICS4+"000" if ll==1
drop ll

*Pre-clean financial variables
*-----------------------------------------
rename turn GO_usd
rename numberofemployees EMP
rename addedvalue VA_usd
rename exportrevenue EXP_usd
rename costsofemployees LAB_usd

replace EMP=EMP/1000
foreach vv in GO_usd EXP_usd VA_usd LAB_usd {
replace `vv'=`vv'/1000000
}
*

*Isocodes
*----------------------------------------   
global cond1 `"  isocode=="AR" | isocode=="AT" | isocode=="AU" | isocode=="BE" | isocode=="BG" | isocode=="BR" | isocode=="CA" |  isocode=="CH" | isocode=="CL" | isocode=="CN" | isocode=="CY" | isocode=="CO" | isocode=="CZ" |  isocode=="DE" |  isocode=="DK" |  isocode=="EE" |  isocode=="ES" |  isocode=="FI" |  isocode=="FR" |  isocode=="GB" |  isocode=="GR" |  isocode=="HK" |  isocode=="HU" |  isocode=="HR" |  isocode=="ID" |  isocode=="IL" |  isocode=="IE" | isocode=="IN" |  isocode=="IS" |  isocode=="IT" |  isocode=="JP" |  isocode=="KR" | isocode=="LT" | isocode=="LV" | isocode=="LU" |  isocode=="MX" |  isocode=="MT" |  isocode=="MY" |  isocode=="NL" |   isocode=="NO" |  isocode=="NZ" |  isocode=="PE" |  isocode=="PL" |  isocode=="PT" |  isocode=="RO" |  isocode=="RS" |   isocode=="RU" | isocode=="SE" |  isocode=="SG" | isocode=="SI" | isocode=="SK" | isocode=="TH" | isocode=="TR" | isocode=="TW" | isocode=="UA" |  isocode=="US" | isocode=="VE" |  isocode=="ZA"   "'
gen a=($cond1)
tab a
keep if a==1
drop a


*Sector classification* 
*------------------------------------------------------------------
gen NAICS2=substr(NAICS4,1,2)
gen NAICS3=substr(NAICS4,1,3)


gen sector="NA"
replace sector="NA" if NAICS2=="" | NAICS2=="NA"
replace sector="Agriculture (A)" if NAICS2=="11"
replace sector="Mining (B)" if NAICS2=="21"

replace sector="Food (10-12)" if NAICS3=="311" | NAICS3=="312"
replace sector="Textiles (13-15)" if NAICS3=="313" | NAICS3=="314" | NAICS3=="315" | NAICS3=="316"
replace sector="Wood (16-18)" if NAICS3=="321" | NAICS3=="322" | NAICS3=="323"
replace sector="Petroleum (19)" if NAICS3=="324"
replace sector="Chemicals (20-21)" if NAICS3=="325"
replace sector="Plastic (22-23)" if NAICS3=="326" | NAICS3=="327"
replace sector="Basic Metals (24-25)" if NAICS3=="331" | NAICS3=="332"
replace sector="Electrical_Equip (26-27)" if NAICS3=="334" |  NAICS3=="335" 
replace sector="Machinery (28)" if NAICS3=="333" 
replace sector="Transport (29-30)" if NAICS3=="336"
replace sector="Other_Manufacturing (31-33)" if NAICS3=="337" | NAICS3=="339"

replace sector="Electricity (D-E)" if NAICS2=="22"
replace sector="Construction (F)" if NAICS2=="23"
replace sector="Wholesale_Retail (G)" if NAICS2=="42" | NAICS2=="44" | NAICS2=="45"
***replace sector="Wholesale_Retail (G)" if NAICS2=="41" | NAICS2=="44" | NAICS2=="45" *** no entiendo porque saque el 42 de aca??? Tampoco hay NAICS 41

replace sector="Transportation_Storage (H)" if NAICS2=="48" | NAICS2=="49"
replace sector="Accomodation (I)" if NAICS2=="72"
replace sector="Information (J)" if  NAICS2=="51"
replace sector="Financial_Insurance (K)" if  NAICS2=="52"
replace sector="Real_Estate (L)" if  NAICS2=="53"
replace sector="Support_Services (M-N)" if NAICS2=="54" | NAICS2=="55" | NAICS2=="56"
replace sector="Public_Administration (O)" if NAICS2=="92"
replace sector="Education (P)" if  NAICS2=="61"
replace sector="Health (Q)" if  NAICS2=="62"
replace sector="Recreation (R-S)" if NAICS2=="71" | NAICS2=="81"


*Do further aggregations (to avoid iso-sector codes without information. But notice that by eliminating the variable when at least one sector is missing we are not helping that RIGHT?)
gen sector2=sector
replace sector2="TexWood (13-18)" if sector=="Textiles (13-15)" | sector=="Wood (16-18)"
replace sector2="ChePetPla (19-23)" if sector=="Chemicals (20-21)" | sector=="Petroleum (19)" | sector=="Plastic (22-23)"
replace sector2="ElecMach (26-28)" if sector=="Electrical_Equip (26-27)" | sector=="Machinery (28)"
replace sector2="TranspOtherManuf (29-33)" if sector=="Transport (29-30)" | sector=="Other_Manufacturing (31-33)"
replace sector2="AgrMining (A-B)" if sector=="Agriculture (A)" | sector=="Mining (B)"
replace sector2="Accomod_Recreat (I-R-S)" if sector=="Accomodation (I)" | sector=="Recreation (R-S)"
gen sec_count=1
replace sec_count=2 if sector2=="TexWood (13-18)"
replace sec_count=3 if sector2=="ChePetPla (19-23)"
replace sec_count=2 if sector2=="ElecMach (26-28)"
replace sec_count=2 if sector2=="TranspOtherManuf (29-33)"
replace sec_count=2 if sector2=="AgrMining (A-B)"
replace sec_count=2 if sector2=="Accomod_Recreat (I-R-S)"

*Replace by missing if any of them is not available
/*
foreach var of varlist GO_usd EMP EXP_usd VA_usd LAB_usd {
display "`var'"
by year isocode sector2, sort: egen xx=count(`var')
replace `var'=. if xx<sec_count
drop xx
}
*
*/

include "${code}/label_collapse_bef.do"
collapse (sum) GO_usd EMP EXP_usd VA_usd LAB_usd number_turm number_emp, by(isocode year sector2)
include "${code}/label_collapse_aft.do"
rename sector2 sector

tab sector 
tab isocode if sector=="NA" | sector==""


*Collapse the financial variables using the sector variable 
*---------------------------------------------------------
foreach vv in GO_usd VA_usd EMP EXP_usd LAB_usd {
display "`vv'"
replace `vv'=. if `vv'<=0
}
label variable GO_usd "Gross output at current prices (MM USD)"
label variable VA_usd "Value added, current prices (MM USD)"
label variable EMP "Number of employees (Thousands)"
label variable EXP_usd "Exports, current prices (MM USD)"
label variable LAB_usd "Labor Cost, current prices (MM USD)"

drop if isocode=="CY" | isocode=="ID" | isocode=="IL"| isocode=="MT" | isocode=="MY" | isocode=="VE" | isocode=="ZA"   
drop if GO_usd==. & VA_usd==. &  EMP==. &  EXP_usd==. & LAB_usd==.
format %9.0fc GO_usd VA_usd EXP_usd LAB_usd number_turm number_emp
format %9.2fc EMP
sort year isocode sector
compress 

*Keep only the period under analysis 
keep if year>=2005 & year<=2017


foreach var of varlist GO_usd VA_usd EMP EXP_usd LAB_usd number_turm number_emp {
rename `var' `var'_orbis_`selectedvar'
}
save "${output}/orbis_historical_agg_allyears_`selectedvar'.dta", replace

}


*---------------------------------------------------------------------
*Merge this files in one 
*---------------------------------------------------------------------
clear all
use "${output}/orbis_historical_agg_allyears_vawb.dta"
foreach selectedvar in  all emva saem saemva saemvawb savawb sawb vawb {
display "`selectedvar'"
merge 1:1 year isocode sector using "${output}/orbis_historical_agg_allyears_`selectedvar'.dta"
drop _merge
}
order isocode year sector GO_usd_orbis* EMP_orbis* VA_usd_orbis* LAB_usd_orbis* EXP_usd_orbis* number_turm* number_emp*

compress
tempfile temp
save `temp', replace 
save "${output}/orbis_historical_agg_allyears_allvariables.dta", replace





*ABOVE this line ORBIS is cleaned using the very last batch in 2021. 
*================================================================================================================================


*-----------------------------------------------------------------------
*ORBIS aggregate level data
*-----------------------------------------------------------------------
clear all
set more off
global typeden=0 
do "TFP_firm_directory_historical.do"
use "${output}/orbis_historical_agg.dta", clear
drop _TYPE_ _FREQ_
tostring industry_code, gen(NAICS)
drop industry_code

replace emp=emp/1000
foreach vv in turn exp_rev val_added {
replace `vv'=`vv'/1000000
}
*
rename turn GO_usd
rename emp EMPE
rename val_added VA_usd
rename exp_rev EXP_usd


global cond1 `"  isocode=="AR" | isocode=="AT" | isocode=="AU" | isocode=="BE" | isocode=="BG" | isocode=="BR" | isocode=="CA" |  isocode=="CH" | isocode=="CL" | isocode=="CN" | isocode=="CY" | isocode=="CO" | isocode=="CZ" |  isocode=="DE" |  isocode=="DK" |  isocode=="EE" |  isocode=="ES" |  isocode=="FI" |  isocode=="FR" |  isocode=="GB" |  isocode=="GR" |  isocode=="HK" |  isocode=="HU" |  isocode=="HR" |  isocode=="ID" |  isocode=="IL" |  isocode=="IE" | isocode=="IN" |  isocode=="IS" |  isocode=="IT" |  isocode=="JP" |  isocode=="KR" | isocode=="LT" | isocode=="LV" | isocode=="LU" |  isocode=="MX" |  isocode=="MT" |  isocode=="MY" |  isocode=="NL" |   isocode=="NO" |  isocode=="NZ" |  isocode=="PE" |  isocode=="PL" |  isocode=="PT" |  isocode=="RO" |  isocode=="RS" |   isocode=="RU" | isocode=="SE" |  isocode=="SG" | isocode=="SI" | isocode=="SK" | isocode=="TH" | isocode=="TR" | isocode=="TW" | isocode=="UA" |  isocode=="US" | isocode=="VE" |  isocode=="ZA"   "'
gen a=($cond1)
tab a
keep if a==1
drop a

*Sector classification* 
*--------------------------------------------------------------------
replace NAICS="" if NAICS=="."
rename NAICS NAICS4
gen NAICS2=substr(NAICS4,1,2)
gen NAICS3=substr(NAICS4,1,3)

gen sector="NA"
replace sector="NA" if NAICS2=="" | NAICS2=="NA"
replace sector="Agriculture (A)" if NAICS2=="11"
replace sector="Mining (B)" if NAICS2=="21"

replace sector="Food (10-12)" if NAICS3=="311" | NAICS3=="312"
replace sector="Textiles (13-15)" if NAICS3=="313" | NAICS3=="314" | NAICS3=="315" | NAICS3=="316"
replace sector="Wood (16-18)" if NAICS3=="321" | NAICS3=="322" | NAICS3=="323"
replace sector="Petroleum (19)" if NAICS3=="324"
replace sector="Chemicals (20-21)" if NAICS3=="325"
replace sector="Plastic (22-23)" if NAICS3=="326" | NAICS3=="327"
replace sector="Basic Metals (24-25)" if NAICS3=="331" | NAICS3=="332"
replace sector="Electrical_Equip (26-27)" if NAICS3=="334" |  NAICS3=="335" 
replace sector="Machinery (28)" if NAICS3=="333" 
replace sector="Transport (29-30)" if NAICS3=="336"
replace sector="Other_Manufacturing (31-33)" if NAICS3=="337" | NAICS3=="339"

replace sector="Electricity (D-E)" if NAICS2=="22"
replace sector="Construction (F)" if NAICS2=="23"
*replace sector="Wholesale_Retail (G)" if NAICS2=="41" | NAICS2=="42" | NAICS2=="44" | NAICS2=="45"
replace sector="Wholesale_Retail (G)" if NAICS2=="41" | NAICS2=="44" | NAICS2=="45"

replace sector="Transportation_Storage (H)" if NAICS2=="48" | NAICS2=="49"
replace sector="Accomodation (I)" if NAICS2=="72"
replace sector="Information (J)" if  NAICS2=="51"
replace sector="Financial_Insurance (K)" if  NAICS2=="52"
replace sector="Real_Estate (L)" if  NAICS2=="53"
replace sector="Support_Services (M-N)" if NAICS2=="54" | NAICS2=="55" | NAICS2=="56"
replace sector="Public_Administration (O)" if NAICS2=="92"
replace sector="Education (P)" if  NAICS2=="61"
replace sector="Health (Q)" if  NAICS2=="62"
replace sector="Recreation (R-S)" if NAICS2=="71" | NAICS2=="81"

tab sector 
compress
tempfile temp1
save `temp1', replace


*Manufacturing* 
*------------------------------
clear all
use `temp1', clear
gen sector1="NA"
replace sector1="Manufacturing (C)" if sector=="Food (10-12)"
replace sector1="Manufacturing (C)" if sector=="Textiles (13-15)"
replace sector1="Manufacturing (C)" if sector=="Wood (16-18)"
replace sector1="Manufacturing (C)" if sector=="Petroleum (19)"
replace sector1="Manufacturing (C)" if sector=="Chemicals (20-21)"
replace sector1="Manufacturing (C)" if sector=="Plastic (22-23)"
replace sector1="Manufacturing (C)" if sector=="Basic Metals (24-25)"
replace sector1="Manufacturing (C)" if sector=="Electrical_Equip (26-27)"
replace sector1="Manufacturing (C)" if sector=="Machinery (28)"
replace sector1="Manufacturing (C)" if sector=="Transport (29-30)"
replace sector1="Manufacturing (C)" if sector=="Other_Manufacturing (31-33)"
keep if sector1=="Manufacturing (C)"
collapse (sum) GO_usd VA_usd EMPE EXP_usd, by(year isocode sector1)
rename sector1 sector
tempfile temp1_manuf
save `temp1_manuf', replace


*Marketing Services
*------------------------------
clear all
use `temp1', clear
gen sector1="NA"
replace sector1="Market_Services (G-H-I-J-K-M-N-R-S-T)" if sector=="Wholesale_Retail (G)"
replace sector1="Market_Services (G-H-I-J-K-M-N-R-S-T)" if sector=="Transportation_Storage (H)"
replace sector1="Market_Services (G-H-I-J-K-M-N-R-S-T)" if sector=="Accomodation (I)"
replace sector1="Market_Services (G-H-I-J-K-M-N-R-S-T)" if sector=="Information (J)"
replace sector1="Market_Services (G-H-I-J-K-M-N-R-S-T)" if sector=="Financial_Insurance (K)"
replace sector1="Market_Services (G-H-I-J-K-M-N-R-S-T)" if sector=="Support_Services (M-N)"
replace sector1="Market_Services (G-H-I-J-K-M-N-R-S-T)" if sector=="Recreation (R-S)"
replace sector1="Market_Services (G-H-I-J-K-M-N-R-S-T)" if sector=="Activity_households (T)"
keep if sector1=="Market_Services (G-H-I-J-K-M-N-R-S-T)"
collapse (sum) GO_usd VA_usd EMPE EXP_usd, by(year isocode sector1)
rename sector1 sector
tempfile temp1_serv
save `temp1_serv', replace


*Other Goods
*------------------------------
clear all
use `temp1', clear
gen sector1="NA"
replace sector1="Other_Goods (A-B-D-E-F)" if sector=="Agriculture (A)"
replace sector1="Other_Goods (A-B-D-E-F)" if sector=="Mining (B)"
replace sector1="Other_Goods (A-B-D-E-F)" if sector=="Electricity (D-E)"
replace sector1="Other_Goods (A-B-D-E-F)" if sector=="Construction (F)"
keep if sector1=="Other_Goods (A-B-D-E-F)"
collapse (sum) GO_usd VA_usd EMPE EXP_usd, by(year isocode sector1)
rename sector1 sector
tempfile temp1_other
save `temp1_other', replace


*Market Economy 
*------------------------------
use `temp1_other', clear
append using `temp1_serv'
append using `temp1_manuf'
gen sector1="Market (MARKT)"
collapse (sum) GO_usd VA_usd EMPE EXP_usd, by(year isocode sector1)
rename sector1 sector
tempfile temp1_market
save `temp1_market', replace


*Non Market Economy 
*------------------------------
clear all
use `temp1', clear
gen sector1="NA"
replace sector1="Non-Market Economy" if sector!="NA" & sector1=="NA"
keep if sector1=="Non-Market Economy"
collapse (sum) GO_usd VA_usd EMPE EXP_usd, by(year isocode sector1)
rename sector1 sector
tempfile temp1_nonmarket
save `temp1_nonmarket', replace


*Total  
*------------------------------
use `temp1_market', clear
append using `temp1_nonmarket'
gen sector1="Total (TOT)"
collapse (sum) GO_usd VA_usd EMPE EXP_usd, by(year isocode sector1)
rename sector1 sector
tempfile temp1_total
save `temp1_total', replace


*Append
*-----------------------------
use `temp1', clear
append using `temp1_manuf'
append using `temp1_serv'
append using `temp1_other'
append using `temp1_nonmarket'
append using `temp1_total'
append using `temp1_market'

drop if sector=="NA" | sector==""

collapse (sum) GO_usd VA_usd EMPE EXP_usd, by(year isocode sector)
foreach vv in GO_usd VA_usd EMPE EXP_usd {
display "`vv'"
replace `vv'=. if `vv'==0 
}
label variable GO_usd "Gross output at current prices (MM USD)"
label variable VA_usd "Value added, current prices (MM USD)"
label variable EMPE "Number of employees (Thousands)"
label variable EXP_usd "Exports, current prices (MM USD)"

drop if isocode=="CY" | isocode=="ID" | isocode=="IL"| isocode=="MT" | isocode=="MY" | isocode=="VE" | isocode=="ZA"   
drop if GO_usd==. & VA_usd==. &  EMPE==. &  EXP_usd==.
format %9.0fc GO_usd VA_usd EXP_usd
format %9.2fc EMPE
sort year isocode sector
compress 
save "${output}/orbis_historical_agg_allyears.dta", replace


*******************************************************************************************
*Having ORBIS as aggregates (to re-estimate the dummies) 
*******************************************************************************************
global typeden=1
include "TFP_firm_directory_historical.do"

clear all
use "${output}/orbis_historical_agg_allyears.dta", clear
foreach var of varlist GO_usd VA_usd EMPE EXP_usd {
rename `var' `var'_orbis
}
compress
save "${output}/orbis_historical_agg_allyears_forreg.dta", replace









